<?php
/*******************************************************************************
Version: 1.11 ($Rev: 175 $)
Website: http://sourceforge.net/projects/simplehtmldom/
Author: S.C. Chen <me578022@gmail.com>
Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
Contributions by:
    Yousuke Kumakura (Attribute filters)
    Vadim Voituk (Negative indexes supports of "find" method)
    Antcs (Constructor with automatically load contents either text or file/url)
Licensed under The MIT License
Redistributions of files must retain the above copyright notice.
 *******************************************************************************/

define('HDOM_TYPE_ELEMENT', 1);
define('HDOM_TYPE_COMMENT', 2);
define('HDOM_TYPE_TEXT', 3);
define('HDOM_TYPE_ENDTAG', 4);
define('HDOM_TYPE_ROOT', 5);
define('HDOM_TYPE_UNKNOWN', 6);
define('HDOM_QUOTE_DOUBLE', 0);
define('HDOM_QUOTE_SINGLE', 1);
define('HDOM_QUOTE_NO', 3);
define('HDOM_INFO_BEGIN', 0);
define('HDOM_INFO_END', 1);
define('HDOM_INFO_QUOTE', 2);
define('HDOM_INFO_SPACE', 3);
define('HDOM_INFO_TEXT', 4);
define('HDOM_INFO_INNER', 5);
define('HDOM_INFO_OUTER', 6);
define('HDOM_INFO_ENDSPACE', 7);

// helper functions
// -----------------------------------------------------------------------------
// get html dom form file
function file_get_html() {
	$dom = new simple_html_dom();
	$args = func_get_args();
	$dom->load(call_user_func_array('file_get_contents', $args), true);
	return $dom;
}

// get html dom form string
function str_get_html($str, $lowercase = true) {
	$dom = new simple_html_dom();
	$dom->load($str, $lowercase);
	return $dom;
}

// dump html dom tree
function dump_html_tree($node, $show_attr = true, $deep = 0) {
	$lead = str_repeat('    ', $deep);
	echo $lead . $node->tag;
	if($show_attr && count($node->attr) > 0) {
		echo '(';
		foreach($node->attr as $k => $v)
			echo "[$k]=>\"" . $node->$k . '", ';
		echo ')';
	}
	echo "\n";
	
	foreach($node->nodes as $c)
		dump_html_tree($c, $show_attr, $deep + 1);
}

// get dom form file (deprecated)
function file_get_dom() {
	$dom = new simple_html_dom();
	$args = func_get_args();
	$dom->load(call_user_func_array('file_get_contents', $args), true);
	return $dom;
}

// get dom form string (deprecated)
function str_get_dom($str, $lowercase = true) {
	$dom = new simple_html_dom();
	$dom->load($str, $lowercase);
	return $dom;
}

// simple html dom node
// -----------------------------------------------------------------------------
class simple_html_dom_node {
	public $nodetype = HDOM_TYPE_TEXT;
	public $tag = 'text';
	public $attr = array();
	public $children = array();
	public $nodes = array();
	public $parent = null;
	public $_ = array();
	private $dom = null;
	
	function __construct($dom) {
		$this->dom = $dom;
		$dom->nodes[] = $this;
	}
	
	function __destruct() {
		$this->clear();
	}
	
	function __toString() {
		return $this->outertext();
	}
	
	// clean up memory due to php5 circular references memory leak...
	function clear() {
		$this->dom = null;
		$this->nodes = null;
		$this->parent = null;
		$this->children = null;
	}
	
	// dump node's tree
	function dump($show_attr = true) {
		dump_html_tree($this, $show_attr);
	}
	
	// returns the parent of node
	function parent() {
		return $this->parent;
	}
	
	// returns children of node
	function children($idx = -1) {
		if($idx === -1)
			return $this->children;
		if(isset($this->children[$idx]))
			return $this->children[$idx];
		return null;
	}
	
	// returns the first child of node
	function first_child() {
		if(count($this->children) > 0)
			return $this->children[0];
		return null;
	}
	
	// returns the last child of node
	function last_child() {
		if(($count = count($this->children)) > 0)
			return $this->children[$count - 1];
		return null;
	}
	
	// returns the next sibling of node    
	function next_sibling() {
		if($this->parent === null)
			return null;
		$idx = 0;
		$count = count($this->parent->children);
		while($idx < $count && $this !== $this->parent->children[$idx])
			++$idx;
		if(++$idx >= $count)
			return null;
		return $this->parent->children[$idx];
	}
	
	// returns the previous sibling of node
	function prev_sibling() {
		if($this->parent === null)
			return null;
		$idx = 0;
		$count = count($this->parent->children);
		while($idx < $count && $this !== $this->parent->children[$idx])
			++$idx;
		if(--$idx < 0)
			return null;
		return $this->parent->children[$idx];
	}
	
	// get dom node's inner html
	function innertext() {
		if(isset($this->_[HDOM_INFO_INNER]))
			return $this->_[HDOM_INFO_INNER];
		if(isset($this->_[HDOM_INFO_TEXT]))
			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
		
		$ret = '';
		foreach($this->nodes as $n)
			$ret .= $n->outertext();
		return $ret;
	}
	
	// get dom node's outer text (with tag)
	function outertext() {
		if($this->tag === 'root')
			return $this->innertext();
			
		// trigger callback
		if($this->dom->callback !== null)
			call_user_func_array($this->dom->callback, array($this));
		
		if(isset($this->_[HDOM_INFO_OUTER]))
			return $this->_[HDOM_INFO_OUTER];
		if(isset($this->_[HDOM_INFO_TEXT]))
			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
			
		// render begin tag
		$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
		
		// render inner text
		if(isset($this->_[HDOM_INFO_INNER]))
			$ret .= $this->_[HDOM_INFO_INNER];
		else {
			foreach($this->nodes as $n)
				$ret .= $n->outertext();
		}
		
		// render end tag
		if(isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0)
			$ret .= '</' . $this->tag . '>';
		return $ret;
	}
	
	// get dom node's plain text
	function text() {
		if(isset($this->_[HDOM_INFO_INNER]))
			return $this->_[HDOM_INFO_INNER];
		switch($this->nodetype) {
			case HDOM_TYPE_TEXT:
				return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
			case HDOM_TYPE_COMMENT:
				return '';
			case HDOM_TYPE_UNKNOWN:
				return '';
		}
		if(strcasecmp($this->tag, 'script') === 0)
			return '';
		if(strcasecmp($this->tag, 'style') === 0)
			return '';
		
		$ret = '';
		foreach($this->nodes as $n)
			$ret .= $n->text();
		return $ret;
	}
	
	function xmltext() {
		$ret = $this->innertext();
		$ret = str_ireplace('<![CDATA[', '', $ret);
		$ret = str_replace(']]>', '', $ret);
		return $ret;
	}
	
	// build node's text with tag
	function makeup() {
		// text, comment, unknown
		if(isset($this->_[HDOM_INFO_TEXT]))
			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
		
		$ret = '<' . $this->tag;
		$i = -1;
		
		foreach($this->attr as $key => $val) {
			++$i;
			
			// skip removed attribute
			if($val === null || $val === false)
				continue;
			
			$ret .= $this->_[HDOM_INFO_SPACE][$i][0];
			//no value attr: nowrap, checked selected...
			if($val === true)
				$ret .= $key;
			else {
				switch($this->_[HDOM_INFO_QUOTE][$i]) {
					case HDOM_QUOTE_DOUBLE:
						$quote = '"';
						break;
					case HDOM_QUOTE_SINGLE:
						$quote = '\'';
						break;
					default:
						$quote = '';
				}
				$ret .= $key . $this->_[HDOM_INFO_SPACE][$i][1] . '=' . $this->_[HDOM_INFO_SPACE][$i][2] . $quote . $val . $quote;
			}
		}
		$ret = $this->dom->restore_noise($ret);
		return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
	}
	
	// find elements by css selector
	function find($selector, $idx = null) {
		$selectors = $this->parse_selector($selector);
		if(($count = count($selectors)) === 0)
			return array();
		$found_keys = array();
		
		// find each selector
		for($c = 0; $c < $count; ++$c) {
			if(($levle = count($selectors[0])) === 0)
				return array();
			if(!isset($this->_[HDOM_INFO_BEGIN]))
				return array();
			
			$head = array($this->_[HDOM_INFO_BEGIN]=>1);
			
			// handle descendant selectors, no recursive!
			for($l = 0; $l < $levle; ++$l) {
				$ret = array();
				foreach($head as $k => $v) {
					$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
					$n->seek($selectors[$c][$l], $ret);
				}
				$head = $ret;
			}
			
			foreach($head as $k => $v) {
				if(!isset($found_keys[$k]))
					$found_keys[$k] = 1;
			}
		}
		
		// sort keys
		ksort($found_keys);
		
		$found = array();
		foreach($found_keys as $k => $v)
			$found[] = $this->dom->nodes[$k];
			
		// return nth-element or array
		if(is_null($idx))
			return $found;
		else if($idx < 0)
			$idx = count($found) + $idx;
		return (isset($found[$idx])) ? $found[$idx] : null;
	}
	
	// seek for given conditions
	protected function seek($selector, &$ret) {
		list($tag, $key, $val, $exp, $no_key) = $selector;
		
		// xpath index
		if($tag && $key && is_numeric($key)) {
			$count = 0;
			foreach($this->children as $c) {
				if($tag === '*' || $tag === $c->tag) {
					if(++$count == $key) {
						$ret[$c->_[HDOM_INFO_BEGIN]] = 1;
						return;
					}
				}
			}
			return;
		}
		
		$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
		if($end == 0) {
			$parent = $this->parent;
			while(!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
				$end -= 1;
				$parent = $parent->parent;
			}
			$end += $parent->_[HDOM_INFO_END];
		}
		
		for($i = $this->_[HDOM_INFO_BEGIN] + 1; $i < $end; ++$i) {
			$node = $this->dom->nodes[$i];
			$pass = true;
			
			if($tag === '*' && !$key) {
				if(in_array($node, $this->children, true))
					$ret[$i] = 1;
				continue;
			}
			
			// compare tag
			if($tag && $tag != $node->tag && $tag !== '*') {
				$pass = false;
			}
			// compare key
			if($pass && $key) {
				if($no_key) {
					if(isset($node->attr[$key]))
						$pass = false;
				} else if(!isset($node->attr[$key]))
					$pass = false;
			}
			// compare value
			if($pass && $key && $val && $val !== '*') {
				$check = $this->match($exp, $val, $node->attr[$key]);
				// handle multiple class
				if(!$check && strcasecmp($key, 'class') === 0) {
					foreach(explode(' ', $node->attr[$key]) as $k) {
						$check = $this->match($exp, $val, $k);
						if($check)
							break;
					}
				}
				if(!$check)
					$pass = false;
			}
			if($pass)
				$ret[$i] = 1;
			unset($node);
		}
	}
	
	protected function match($exp, $pattern, $value) {
		switch($exp) {
			case '=':
				return ($value === $pattern);
			case '!=':
				return ($value !== $pattern);
			case '^=':
				return preg_match("/^" . preg_quote($pattern, '/') . "/", $value);
			case '$=':
				return preg_match("/" . preg_quote($pattern, '/') . "$/", $value);
			case '*=':
				if($pattern[0] == '/')
					return preg_match($pattern, $value);
				return preg_match("/" . $pattern . "/i", $value);
		}
		return false;
	}
	
	protected function parse_selector($selector_string) {
		// pattern of CSS selectors, modified from mootools
		$pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
		preg_match_all($pattern, trim($selector_string) . ' ', $matches, PREG_SET_ORDER);
		$selectors = array();
		$result = array();
		//print_r($matches);
		

		foreach($matches as $m) {
			$m[0] = trim($m[0]);
			if($m[0] === '' || $m[0] === '/' || $m[0] === '//')
				continue;
				// for borwser grnreated xpath
			if($m[1] === 'tbody')
				continue;
			
			list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false);
			if(!empty($m[2])) {
				$key = 'id';
				$val = $m[2];
			}
			if(!empty($m[3])) {
				$key = 'class';
				$val = $m[3];
			}
			if(!empty($m[4])) {
				$key = $m[4];
			}
			if(!empty($m[5])) {
				$exp = $m[5];
			}
			if(!empty($m[6])) {
				$val = $m[6];
			}
			
			// convert to lowercase
			if($this->dom->lowercase) {
				$tag = strtolower($tag);
				$key = strtolower($key);
			}
			//elements that do NOT have the specified attribute
			if(isset($key[0]) && $key[0] === '!') {
				$key = substr($key, 1);
				$no_key = true;
			}
			
			$result[] = array($tag, $key, $val, $exp, $no_key);
			if(trim($m[7]) === ',') {
				$selectors[] = $result;
				$result = array();
			}
		}
		if(count($result) > 0)
			$selectors[] = $result;
		return $selectors;
	}
	
	function __get($name) {
		if(isset($this->attr[$name]))
			return $this->attr[$name];
		switch($name) {
			case 'outertext':
				return $this->outertext();
			case 'innertext':
				return $this->innertext();
			case 'plaintext':
				return $this->text();
			case 'xmltext':
				return $this->xmltext();
			default:
				return array_key_exists($name, $this->attr);
		}
	}
	
	function __set($name, $value) {
		switch($name) {
			case 'outertext':
				return $this->_[HDOM_INFO_OUTER] = $value;
			case 'innertext':
				if(isset($this->_[HDOM_INFO_TEXT]))
					return $this->_[HDOM_INFO_TEXT] = $value;
				return $this->_[HDOM_INFO_INNER] = $value;
		}
		if(!isset($this->attr[$name])) {
			$this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
			$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
		}
		$this->attr[$name] = $value;
	}
	
	function __isset($name) {
		switch($name) {
			case 'outertext':
				return true;
			case 'innertext':
				return true;
			case 'plaintext':
				return true;
		}
		//no value attr: nowrap, checked selected...
		return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
	}
	
	function __unset($name) {
		if(isset($this->attr[$name]))
			unset($this->attr[$name]);
	}
	
	// camel naming conventions
	function getAllAttributes() {
		return $this->attr;
	}
	function getAttribute($name) {
		return $this->__get($name);
	}
	function setAttribute($name, $value) {
		$this->__set($name, $value);
	}
	function hasAttribute($name) {
		return $this->__isset($name);
	}
	function removeAttribute($name) {
		$this->__set($name, null);
	}
	function getElementById($id) {
		return $this->find("#$id", 0);
	}
	function getElementsById($id, $idx = null) {
		return $this->find("#$id", $idx);
	}
	function getElementByTagName($name) {
		return $this->find($name, 0);
	}
	function getElementsByTagName($name, $idx = null) {
		return $this->find($name, $idx);
	}
	function parentNode() {
		return $this->parent();
	}
	function childNodes($idx = -1) {
		return $this->children($idx);
	}
	function firstChild() {
		return $this->first_child();
	}
	function lastChild() {
		return $this->last_child();
	}
	function nextSibling() {
		return $this->next_sibling();
	}
	function previousSibling() {
		return $this->prev_sibling();
	}
}

// simple html dom parser
// -----------------------------------------------------------------------------
class simple_html_dom {
	public $root = null;
	public $nodes = array();
	public $callback = null;
	public $lowercase = false;
	protected $pos;
	protected $doc;
	protected $char;
	protected $size;
	protected $cursor;
	protected $parent;
	protected $noise = array();
	protected $token_blank = " \t\r\n";
	protected $token_equal = ' =/>';
	protected $token_slash = " />\r\n\t";
	protected $token_attr = ' >';
	// use isset instead of in_array, performance boost about 30%...
	protected $self_closing_tags = array('img'=>1, 'br'=>1, 'input'=>1, 'meta'=>1, 'link'=>1, 'hr'=>1, 'base'=>1, 'embed'=>1, 'spacer'=>1);
	protected $block_tags = array('root'=>1, 'body'=>1, 'form'=>1, 'div'=>1, 'span'=>1, 'table'=>1);
	protected $optional_closing_tags = array('tr'=>array('tr'=>1, 'td'=>1, 'th'=>1), 'th'=>array('th'=>1), 'td'=>array('td'=>1), 'li'=>array('li'=>1), 'dt'=>array('dt'=>1, 'dd'=>1), 'dd'=>array('dd'=>1, 'dt'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'p'=>array('p'=>1), 'nobr'=>array('nobr'=>1));
	
	function __construct($str = null) {
		if($str) {
			if(preg_match("/^http:\/\//i", $str) || is_file($str))
				$this->load_file($str);
			else
				$this->load($str);
		}
	}
	
	function __destruct() {
		$this->clear();
	}
	
	// load html from string
	function load($str, $lowercase = true) {
		// prepare
		$this->prepare($str, $lowercase);
		// strip out comments
		$this->remove_noise("'<!--(.*?)-->'is");
		// strip out cdata
		$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
		// strip out <style> tags
		$this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
		$this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
		// strip out <script> tags
		$this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
		$this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
		// strip out preformatted tags
		$this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
		// strip out server side scripts
		$this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
		// strip smarty scripts
		$this->remove_noise("'(\{\w)(.*?)(\})'s", true);
		
		// parsing
		while($this->parse())
			;
			// end
		$this->root->_[HDOM_INFO_END] = $this->cursor;
	}
	
	// load html from file
	function load_file() {
		$args = func_get_args();
		$this->load(call_user_func_array('file_get_contents', $args), true);
	}
	
	// set callback function
	function set_callback($function_name) {
		$this->callback = $function_name;
	}
	
	// remove callback function
	function remove_callback() {
		$this->callback = null;
	}
	
	// save dom as string
	function save($filepath = '') {
		$ret = $this->root->innertext();
		if($filepath !== '')
			file_put_contents($filepath, $ret);
		return $ret;
	}
	
	// find dom node by css selector
	function find($selector, $idx = null) {
		return $this->root->find($selector, $idx);
	}
	
	// clean up memory due to php5 circular references memory leak...
	function clear() {
		foreach($this->nodes as $n) {
			$n->clear();
			$n = null;
		}
		if(isset($this->parent)) {
			$this->parent->clear();
			unset($this->parent);
		}
		if(isset($this->root)) {
			$this->root->clear();
			unset($this->root);
		}
		unset($this->doc);
		unset($this->noise);
	}
	
	function dump($show_attr = true) {
		$this->root->dump($show_attr);
	}
	
	// prepare HTML data and init everything
	protected function prepare($str, $lowercase = true) {
		$this->clear();
		$this->doc = $str;
		$this->pos = 0;
		$this->cursor = 1;
		$this->noise = array();
		$this->nodes = array();
		$this->lowercase = $lowercase;
		$this->root = new simple_html_dom_node($this);
		$this->root->tag = 'root';
		$this->root->_[HDOM_INFO_BEGIN] = -1;
		$this->root->nodetype = HDOM_TYPE_ROOT;
		$this->parent = $this->root;
		// set the length of content
		$this->size = strlen($str);
		if($this->size > 0)
			$this->char = $this->doc[0];
	}
	
	// parse html content
	protected function parse() {
		if(($s = $this->copy_until_char('<')) === '')
			return $this->read_tag();
			
		// text
		$node = new simple_html_dom_node($this);
		++$this->cursor;
		$node->_[HDOM_INFO_TEXT] = $s;
		$this->link_nodes($node, false);
		return true;
	}
	
	// read tag info
	protected function read_tag() {
		if($this->char !== '<') {
			$this->root->_[HDOM_INFO_END] = $this->cursor;
			return false;
		}
		$begin_tag_pos = $this->pos;
		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
		

		// end tag
		if($this->char === '/') {
			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
			$this->skip($this->token_blank_t);
			$tag = $this->copy_until_char('>');
			
			// skip attributes in end tag
			if(($pos = strpos($tag, ' ')) !== false)
				$tag = substr($tag, 0, $pos);
			
			$parent_lower = strtolower($this->parent->tag);
			$tag_lower = strtolower($tag);
			
			if($parent_lower !== $tag_lower) {
				if(isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) {
					$this->parent->_[HDOM_INFO_END] = 0;
					$org_parent = $this->parent;
					
					while(($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower)
						$this->parent = $this->parent->parent;
					
					if(strtolower($this->parent->tag) !== $tag_lower) {
						$this->parent = $org_parent; // restore origonal parent
						if($this->parent->parent)
							$this->parent = $this->parent->parent;
						$this->parent->_[HDOM_INFO_END] = $this->cursor;
						return $this->as_text_node($tag);
					}
				} else if(($this->parent->parent) && isset($this->block_tags[$tag_lower])) {
					$this->parent->_[HDOM_INFO_END] = 0;
					$org_parent = $this->parent;
					
					while(($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower)
						$this->parent = $this->parent->parent;
					
					if(strtolower($this->parent->tag) !== $tag_lower) {
						$this->parent = $org_parent; // restore origonal parent
						$this->parent->_[HDOM_INFO_END] = $this->cursor;
						return $this->as_text_node($tag);
					}
				} else if(($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) {
					$this->parent->_[HDOM_INFO_END] = 0;
					$this->parent = $this->parent->parent;
				} else
					return $this->as_text_node($tag);
			}
			
			$this->parent->_[HDOM_INFO_END] = $this->cursor;
			if($this->parent->parent)
				$this->parent = $this->parent->parent;
			
			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
			return true;
		}
		
		$node = new simple_html_dom_node($this);
		$node->_[HDOM_INFO_BEGIN] = $this->cursor;
		++$this->cursor;
		$tag = $this->copy_until($this->token_slash);
		
		// doctype, cdata & comments...
		if(isset($tag[0]) && $tag[0] === '!') {
			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
			
			if(isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') {
				$node->nodetype = HDOM_TYPE_COMMENT;
				$node->tag = 'comment';
			} else {
				$node->nodetype = HDOM_TYPE_UNKNOWN;
				$node->tag = 'unknown';
			}
			
			if($this->char === '>')
				$node->_[HDOM_INFO_TEXT] .= '>';
			$this->link_nodes($node, true);
			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
			return true;
		}
		
		// text
		if($pos = strpos($tag, '<') !== false) {
			$tag = '<' . substr($tag, 0, -1);
			$node->_[HDOM_INFO_TEXT] = $tag;
			$this->link_nodes($node, false);
			$this->char = $this->doc[--$this->pos]; // prev
			return true;
		}
		
		if(!preg_match("/^[\w-:]+$/", $tag)) {
			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
			if($this->char === '<') {
				$this->link_nodes($node, false);
				return true;
			}
			
			if($this->char === '>')
				$node->_[HDOM_INFO_TEXT] .= '>';
			$this->link_nodes($node, false);
			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
			return true;
		}
		
		// begin tag
		$node->nodetype = HDOM_TYPE_ELEMENT;
		$tag_lower = strtolower($tag);
		$node->tag = ($this->lowercase) ? $tag_lower : $tag;
		
		// handle optional closing tags
		if(isset($this->optional_closing_tags[$tag_lower])) {
			while(isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
				$this->parent->_[HDOM_INFO_END] = 0;
				$this->parent = $this->parent->parent;
			}
			$node->parent = $this->parent;
		}
		
		$guard = 0; // prevent infinity loop
		$space = array($this->copy_skip($this->token_blank), '', '');
		
		// attributes
		do {
			if($this->char !== null && $space[0] === '')
				break;
			$name = $this->copy_until($this->token_equal);
			if($guard === $this->pos) {
				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
				continue;
			}
			$guard = $this->pos;
			
			// handle endless '<'
			if($this->pos >= $this->size - 1 && $this->char !== '>') {
				$node->nodetype = HDOM_TYPE_TEXT;
				$node->_[HDOM_INFO_END] = 0;
				$node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
				$node->tag = 'text';
				$this->link_nodes($node, false);
				return true;
			}
			
			// handle mismatch '<'
			if($this->doc[$this->pos - 1] == '<') {
				$node->nodetype = HDOM_TYPE_TEXT;
				$node->tag = 'text';
				$node->attr = array();
				$node->_[HDOM_INFO_END] = 0;
				$node->_[HDOM_INFO_TEXT] = substr($this->doc, $begin_tag_pos, $this->pos - $begin_tag_pos - 1);
				$this->pos -= 2;
				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
				$this->link_nodes($node, false);
				return true;
			}
			
			if($name !== '/' && $name !== '') {
				$space[1] = $this->copy_skip($this->token_blank);
				$name = $this->restore_noise($name);
				if($this->lowercase)
					$name = strtolower($name);
				if($this->char === '=') {
					$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
					$this->parse_attr($node, $name, $space);
				} else {
					//no value attr: nowrap, checked selected...
					$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
					$node->attr[$name] = true;
					if($this->char != '>')
						$this->char = $this->doc[--$this->pos]; // prev
				}
				$node->_[HDOM_INFO_SPACE][] = $space;
				$space = array($this->copy_skip($this->token_blank), '', '');
			} else
				break;
		} while($this->char !== '>' && $this->char !== '/');
		
		$this->link_nodes($node, true);
		$node->_[HDOM_INFO_ENDSPACE] = $space[0];
		
		// check self closing
		if($this->copy_until_char_escape('>') === '/') {
			$node->_[HDOM_INFO_ENDSPACE] .= '/';
			$node->_[HDOM_INFO_END] = 0;
		} else {
			// reset parent
			if(!isset($this->self_closing_tags[strtolower($node->tag)]))
				$this->parent = $node;
		}
		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
		return true;
	}
	
	// parse attributes
	protected function parse_attr($node, $name, &$space) {
		$space[2] = $this->copy_skip($this->token_blank);
		switch($this->char) {
			case '"':
				$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
				$node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('"'));
				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
				break;
			case '\'':
				$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_SINGLE;
				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
				$node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('\''));
				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
				break;
			default:
				$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
				$node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr));
		}
	}
	
	// link node's parent
	protected function link_nodes(&$node, $is_child) {
		$node->parent = $this->parent;
		$this->parent->nodes[] = $node;
		if($is_child)
			$this->parent->children[] = $node;
	}
	
	// as a text node
	protected function as_text_node($tag) {
		$node = new simple_html_dom_node($this);
		++$this->cursor;
		$node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
		$this->link_nodes($node, false);
		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
		return true;
	}
	
	protected function skip($chars) {
		$this->pos += strspn($this->doc, $chars, $this->pos);
		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
	}
	
	protected function copy_skip($chars) {
		$pos = $this->pos;
		$len = strspn($this->doc, $chars, $pos);
		$this->pos += $len;
		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
		if($len === 0)
			return '';
		return substr($this->doc, $pos, $len);
	}
	
	protected function copy_until($chars) {
		$pos = $this->pos;
		$len = strcspn($this->doc, $chars, $pos);
		$this->pos += $len;
		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
		return substr($this->doc, $pos, $len);
	}
	
	protected function copy_until_char($char) {
		if($this->char === null)
			return '';
		
		if(($pos = strpos($this->doc, $char, $this->pos)) === false) {
			$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
			$this->char = null;
			$this->pos = $this->size;
			return $ret;
		}
		
		if($pos === $this->pos)
			return '';
		$pos_old = $this->pos;
		$this->char = $this->doc[$pos];
		$this->pos = $pos;
		return substr($this->doc, $pos_old, $pos - $pos_old);
	}
	
	protected function copy_until_char_escape($char) {
		if($this->char === null)
			return '';
		
		$start = $this->pos;
		while(1) {
			if(($pos = strpos($this->doc, $char, $start)) === false) {
				$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
				$this->char = null;
				$this->pos = $this->size;
				return $ret;
			}
			
			if($pos === $this->pos)
				return '';
			
			if($this->doc[$pos - 1] === '\\') {
				$start = $pos + 1;
				continue;
			}
			
			$pos_old = $this->pos;
			$this->char = $this->doc[$pos];
			$this->pos = $pos;
			return substr($this->doc, $pos_old, $pos - $pos_old);
		}
	}
	
	// remove noise from html content
	protected function remove_noise($pattern, $remove_tag = false) {
		$count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
		
		for($i = $count - 1; $i > -1; --$i) {
			$key = '___noise___' . sprintf('% 3d', count($this->noise) + 100);
			$idx = ($remove_tag) ? 0 : 1;
			$this->noise[$key] = $matches[$i][$idx][0];
			$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
		}
		
		// reset the length of content
		$this->size = strlen($this->doc);
		if($this->size > 0)
			$this->char = $this->doc[0];
	}
	
	// restore noise to html content
	function restore_noise($text) {
		while(($pos = strpos($text, '___noise___')) !== false) {
			$key = '___noise___' . $text[$pos + 11] . $text[$pos + 12] . $text[$pos + 13];
			if(isset($this->noise[$key]))
				$text = substr($text, 0, $pos) . $this->noise[$key] . substr($text, $pos + 14);
		}
		return $text;
	}
	
	function __toString() {
		return $this->root->innertext();
	}
	
	function __get($name) {
		switch($name) {
			case 'outertext':
				return $this->root->innertext();
			case 'innertext':
				return $this->root->innertext();
			case 'plaintext':
				return $this->root->text();
		}
	}
	
	// camel naming conventions
	function childNodes($idx = -1) {
		return $this->root->childNodes($idx);
	}
	function firstChild() {
		return $this->root->first_child();
	}
	function lastChild() {
		return $this->root->last_child();
	}
	function getElementById($id) {
		return $this->find("#$id", 0);
	}
	function getElementsById($id, $idx = null) {
		return $this->find("#$id", $idx);
	}
	function getElementByTagName($name) {
		return $this->find($name, 0);
	}
	function getElementsByTagName($name, $idx = -1) {
		return $this->find($name, $idx);
	}
	function loadFile() {
		$args = func_get_args();
		$this->load(call_user_func_array('file_get_contents', $args), true);
	}
}
?>